## *************************************
##          B5 Lead Models       
##        Hao Zhang & Ye Zhang
##             2025/7/24
## *************************************

# load packages
library(lfe)
library(stargazer)
library(tidyverse)
library(readstata13)

# load data
rm(list = ls())
load("../analysis data/firm analysis.RData")

# load city data
setwd("../raw data")
city <- read.dta13("city panel 1995-2011.dta")
city <- city %>% filter(substr(as.character(city$citycode), 3, 4) != "01")
city <- city %>% mutate(province = as.integer(citycode/100)) %>% 
  filter(province != 11, province != 12, province != 31, citycode != 5102) %>%
  group_by(province, year) %>%
  mutate(gdpgrowth_mean = mean(gdpgrowth, na.rm = TRUE)) %>% 
  mutate(year = year + 1) %>% filter(gdpgrowth <= 0.5) %>% 
  mutate(gdp_pressure = gdpgrowth_mean - gdpgrowth) %>% 
  select(citycode, year, gdp_pressure)

# create leads (Note that the original treatment is already lagged for one year)
library(dplyr)
city <- city %>% group_by(citycode) %>%
  mutate(gdp_pressure_lead1 = dplyr::lead(gdp_pressure, n = 2, default = NA),
         gdp_pressure_lead2 = dplyr::lead(gdp_pressure, n = 3, default = NA)) %>%
  select(-gdp_pressure) %>% select(-province)

# merge data
firm <- left_join(firm, city, by = c("citycode", "year"))

# run inverse hyperbolic sine
ihs <- function(x) {
  y <- log(x + sqrt(x ^ 2 + 1))
  return(y)
}

# unemployment insurance
m1 <- felm(insurance_dummy ~ gdp_pressure + gdp_pressure_lead1 + gdp_pressure_lead2 + ihs(employment) + 
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm)

firm1 <- firm %>% filter(rate1 > 0)
m2 <- felm(log(rate1) ~ gdp_pressure + gdp_pressure_lead1 + gdp_pressure_lead2 + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm1)

# medical insurance
m3 <- felm(medical_dummy ~ gdp_pressure + gdp_pressure_lead1 + gdp_pressure_lead2 + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm)

firm2 <- firm %>% filter(rate2 > 0)
m4 <- felm(log(rate2) ~ gdp_pressure + gdp_pressure_lead1 + gdp_pressure_lead2 + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm2)

# output regression table
stargazer(m1, m2, m3, m4, title = "Interjurisdictional Competition and Social Insurances (Distributive Leads)", 
          dep.var.labels = c("Unemployment", "Unemployment", "Health and Pension", "Health and Pension"),
          omit.stat = c("rsq", "adj.rsq", "ser"), 
          font.size = "small", 
          add.lines = list(c("Mayor Characteristics", "Y", "Y", "Y", "Y"),
                           c("Firm Characteristics", "Y", "Y", "Y", "Y"), 
                           c("Firm Fixed Effects", "Y", "Y", "Y", "Y"), 
                           c("Year Fixed Effects", "Y", "Y", "Y", "Y")))
